{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import numpy as np\n",
    "import math\n",
    "from operator import itemgetter\n",
    "\n",
    "\n",
    "# 近邻数目最多为20\n",
    "K=20  \n",
    "\n",
    "# 推荐物品数目最多为10\n",
    "N=10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[\n",
    "    ['A','b'],\n",
    "    ['A','d'],\n",
    "    ['B','a'],\n",
    "    ['B','b'],\n",
    "    ['B','c'],\n",
    "    ['C','a'],\n",
    "    ['C','b'],\n",
    "    ['C','d'],\n",
    "    ['D','a'],\n",
    "    ['D','e']\n",
    "]\n",
    "data=np.array(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "# user->item的映射\n",
    "# key：user\n",
    "# value：item的set\n",
    "train_data = {}\n",
    "\n",
    "for user, item in data:\n",
    "    train_data.setdefault(user,set())\n",
    "    train_data[user].add(item)\n",
    "\n",
    "# 用户数量和物品数量\n",
    "n_users = len(list(set(data[:,0])))\n",
    "n_items = len(list(set(data[:,1])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'b', 'd'}, 'B': {'a', 'b', 'c'}, 'C': {'a', 'b', 'd'}, 'D': {'a', 'e'}}"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'A', 'C'},\n",
       " 'b': {'A', 'B', 'C'},\n",
       " 'c': {'B'},\n",
       " 'a': {'B', 'C', 'D'},\n",
       " 'e': {'D'}}"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建立item_user倒排表\n",
    "# item->set\n",
    "item_users = dict()\n",
    "for u, items in train_data.items():\n",
    "    for i in items:\n",
    "        if i not in item_users:\n",
    "            item_users[i] = set()\n",
    "        item_users[i].add(u)\n",
    "item_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    " # 计算用户之间共同评分的物品数,C为修正后的，count为修正前的。\n",
    "C = dict()\n",
    "count = dict()\n",
    "for i, users in item_users.items():\n",
    "    for u in users:\n",
    "        for v in users:\n",
    "            if u == v:\n",
    "                continue\n",
    "            C.setdefault(u,{})\n",
    "            C[u].setdefault(v,0)\n",
    "            # 对热门物品进行惩罚\n",
    "            C[u][v] += math.log(n_users/len(users))\n",
    "            \n",
    "\n",
    "            count.setdefault(u, {})\n",
    "            count[u].setdefault(v, 0)\n",
    "            count[u][v] += 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'C': 0.9808292530117262, 'B': 0.28768207245178085},\n",
       " 'C': {'A': 0.9808292530117262,\n",
       "  'B': 0.5753641449035617,\n",
       "  'D': 0.28768207245178085},\n",
       " 'B': {'A': 0.28768207245178085,\n",
       "  'C': 0.5753641449035617,\n",
       "  'D': 0.28768207245178085},\n",
       " 'D': {'C': 0.28768207245178085, 'B': 0.28768207245178085}}"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'C': 2, 'B': 1},\n",
       " 'C': {'A': 2, 'B': 2, 'D': 1},\n",
       " 'B': {'A': 1, 'C': 2, 'D': 1},\n",
       " 'D': {'C': 1, 'B': 1}}"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算原始的余弦相似度\n",
    "def calCosineSimi():\n",
    "    user_sim = dict()\n",
    "    for u, related_users in C.items():\n",
    "        user_sim[u]={}\n",
    "        for v, cuv in related_users.items():\n",
    "            user_sim[u][v] = count[u][v] / math.sqrt(len(train_data[u]) * len(train_data[v]))\n",
    "    return user_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算修正后的余弦相似度\n",
    "def calCorrectionCosineSimi():\n",
    "    user_sim = dict()\n",
    "    for u, related_users in C.items():\n",
    "        user_sim[u]={}\n",
    "        for v, cuv in related_users.items():\n",
    "            user_sim[u][v] = cuv / math.sqrt(len(train_data[u]) * len(train_data[v]))\n",
    "    return user_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算杰卡德相似度\n",
    "def calJaccardSimi():\n",
    "    user_sim = dict()\n",
    "    for u, related_users in C.items():\n",
    "        user_sim[u]={}\n",
    "        for v, cuv in related_users.items():\n",
    "            user_sim[u][v] = count[u][v] / (len(train_data[u])+len(train_data[v])-count[u][v])\n",
    "    return user_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 给用户推荐N个item\n",
    "def predict(user_sim,user):\n",
    "    rank = dict()\n",
    "    interacted_items = train_data[user]\n",
    "\n",
    "    # 寻找最近的K个用户，利用它们的评分信息构造推荐列表\n",
    "    for similar_user, similarity_factor in sorted(user_sim[user].items(),\n",
    "                                                  key=itemgetter(1), reverse=True)[0:K]:\n",
    "        for movie in train_data[similar_user]:\n",
    "            if movie in interacted_items:\n",
    "                continue\n",
    "            rank.setdefault(movie, 0)\n",
    "            rank[movie] += similarity_factor\n",
    "\n",
    "    rec_list=[]\n",
    "    rec_items=sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]\n",
    "    for item,score in rec_items:\n",
    "        rec_list.append([item,score])\n",
    "    return rec_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'C': 0.8164965809277261, 'B': 0.4082482904638631},\n",
       " 'C': {'A': 0.8164965809277261,\n",
       "  'B': 0.6666666666666666,\n",
       "  'D': 0.4082482904638631},\n",
       " 'B': {'A': 0.4082482904638631,\n",
       "  'C': 0.6666666666666666,\n",
       "  'D': 0.4082482904638631},\n",
       " 'D': {'C': 0.4082482904638631, 'B': 0.4082482904638631}}"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 余弦相似度\n",
    "user_sim1=calCosineSimi()\n",
    "user_sim1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'C': 0.40042186577898503, 'B': 0.11744571427554072},\n",
       " 'C': {'A': 0.40042186577898503,\n",
       "  'B': 0.19178804830118723,\n",
       "  'D': 0.11744571427554072},\n",
       " 'B': {'A': 0.11744571427554072,\n",
       "  'C': 0.19178804830118723,\n",
       "  'D': 0.11744571427554072},\n",
       " 'D': {'C': 0.11744571427554072, 'B': 0.11744571427554072}}"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修正后的余弦相似度\n",
    "user_sim2=calCorrectionCosineSimi()\n",
    "user_sim2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'C': 0.6666666666666666, 'B': 0.25},\n",
       " 'C': {'A': 0.6666666666666666, 'B': 0.5, 'D': 0.25},\n",
       " 'B': {'A': 0.25, 'C': 0.5, 'D': 0.25},\n",
       " 'D': {'C': 0.25, 'B': 0.25}}"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 杰卡德相似度\n",
    "user_sim3=calJaccardSimi()\n",
    "user_sim3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 1.2247448713915892], ['c', 0.4082482904638631]]\n",
      "给 B 推荐： [['d', 1.0749149571305296], ['e', 0.4082482904638631]]\n",
      "给 C 推荐： [['c', 0.6666666666666666], ['e', 0.4082482904638631]]\n",
      "给 D 推荐： [['b', 0.8164965809277261], ['d', 0.4082482904638631], ['c', 0.4082482904638631]]\n"
     ]
    }
   ],
   "source": [
    "# 使用余弦相似度进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(user_sim1,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 0.5178675800545257], ['c', 0.11744571427554072]]\n",
      "给 B 推荐： [['d', 0.30923376257672797], ['e', 0.11744571427554072]]\n",
      "给 C 推荐： [['c', 0.19178804830118723], ['e', 0.11744571427554072]]\n",
      "给 D 推荐： [['b', 0.23489142855108144], ['d', 0.11744571427554072], ['c', 0.11744571427554072]]\n"
     ]
    }
   ],
   "source": [
    "# 使用修正后的余弦相似度进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(user_sim2,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 0.9166666666666666], ['c', 0.25]]\n",
      "给 B 推荐： [['d', 0.75], ['e', 0.25]]\n",
      "给 C 推荐： [['c', 0.5], ['e', 0.25]]\n",
      "给 D 推荐： [['b', 0.5], ['d', 0.25], ['c', 0.25]]\n"
     ]
    }
   ],
   "source": [
    "# 使用杰卡德相似度进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(user_sim3,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
